version 8.2
capture clear
capture log close
set more off
set mem 510m
set mat 800

use "data_for_regression.dta"
sort term id
merge term id using "Opinion surveys.dta"
codebook _merge
drop _merge


****T_9 until T_12:
forvalues i=9(1) 12{

local list_outcomes T1_`i' T2_`i' C_T_`i' P1_`i' P2_`i' C_P_`i'
foreach var of local list_outcomes{
	replace `var'_1=1 if strpos(`var',"1")~=0&`var'~=""&`var'_1==.
	replace `var'_1=0 if ((strpos(`var',"1")==0&`var'~="")|`var'=="not answered")&`var'_1==.
	replace `var'_2=1 if strpos(`var',"2")~=0&`var'~=""&`var'_2==.
	replace `var'_2=0 if ((strpos(`var',"2")==0&`var'~="")|`var'=="not answered")&`var'_2==.
	replace `var'_3=1 if strpos(`var',"3")~=0&`var'~=""&`var'_3==.
	replace `var'_3=0 if ((strpos(`var',"3")==0&`var'~="")|`var'=="not answered")&`var'_3==.
	replace `var'_4=1 if strpos(`var',"4")~=0&`var'~=""&`var'_4==.
	replace `var'_4=0 if ((strpos(`var',"4")==0&`var'~="")|`var'=="not answered")&`var'_4==.
	}

***ME
local list_weeks T1 T2 C_T
foreach var of local list_weeks{
	gen `var'_`i'_me=1 if `var'_`i'_1==1&position_number_T==1
	replace `var'_`i'_me=1 if `var'_`i'_2==1&position_number_T==2
	replace `var'_`i'_me=1 if `var'_`i'_3==1&position_number_T==3
	replace `var'_`i'_me=1 if `var'_`i'_4==1&position_number_T==4
	replace `var'_`i'_me=0 if `var'_`i'_me==.&`var'_`i'_1~=.
	}

local list_weeks P1 P2 C_P
foreach var of local list_weeks{
	gen `var'_`i'_me=1 if `var'_`i'_1==1&position_number_P==1
	replace `var'_`i'_me=1 if `var'_`i'_2==1&position_number_P==2
	replace `var'_`i'_me=1 if `var'_`i'_3==1&position_number_P==3
	replace `var'_`i'_me=1 if `var'_`i'_4==1&position_number_P==4
	replace `var'_`i'_me=0 if `var'_`i'_me==.&`var'_`i'_1~=.
	}

****total:
local list_outcomes T1_`i' T2_`i' C_T_`i' P1_`i' P2_`i' C_P_`i'
foreach var of local list_outcomes{
	gen `var'_total=`var'_1+`var'_2+`var'_3+`var'_4
	gen `var'_equal=1 if `var'_total==4
	replace `var'_equal=0 if `var'_total<4&`var'_total~=.
	gen `var'_others=`var'_total-`var'_me
	gen `var'_others_dum=1 if `var'_others>=1&`var'_others~=.
	replace `var'_others_dum=0 if `var'_others==0&`var'_others~=.
	}
	
}

***check the variables:
forvalues i=9(1) 9{
*browse monitoring_T T1_`i' T1_`i'_1 T1_`i'_2 T1_`i'_3 T1_`i'_4 T1_`i'_total T1_`i'_equal T1_`i'_me T1_`i'_others T1_`i'_others_dum position_number_T
sum C_T_`i'_total C_T_`i'_equal
}

****But people can lie. So isolate MY answer as the average of the answer rated by others

forvalues j=9(1) 12{

local list_weeks T1 T2 C_T
foreach var of local list_weeks{
	gen `var'_`j'_byothers=.
	local x=_N
	forvalues i=1(1) `x'{
		if position_number_T==1 in `i'{
			gen temp_myanswer=`var'_`j'_1
			replace temp_myanswer=. in `i'
			egen temp_mean_myanswer=mean(temp_myanswer), by(group_T term)
			replace `var'_`j'_byothers=temp_mean_myanswer if _n == `i'
			drop temp_myanswer temp_mean_myanswer
			}
		if position_number_T==2 in `i'{
			gen temp_myanswer=`var'_`j'_2
			replace temp_myanswer=. in `i'
			egen temp_mean_myanswer=mean(temp_myanswer), by(group_T term)
			replace `var'_`j'_byothers=temp_mean_myanswer if _n == `i'
			drop temp_myanswer temp_mean_myanswer
			}
		if position_number_T==3 in `i'{
			gen temp_myanswer=`var'_`j'_3
			replace temp_myanswer=. in `i'
			egen temp_mean_myanswer=mean(temp_myanswer), by(group_T term)
			replace `var'_`j'_byothers=temp_mean_myanswer if _n == `i'
			drop temp_myanswer temp_mean_myanswer
			}
		if position_number_T==4 in `i'{
			gen temp_myanswer=`var'_`j'_4
			replace temp_myanswer=. in `i'
			egen temp_mean_myanswer=mean(temp_myanswer), by(group_T term)
			replace `var'_`j'_byothers=temp_mean_myanswer if _n == `i'
			drop temp_myanswer temp_mean_myanswer
			}
		}
	}
	
***same for presentations:
local list_weeks P1 P2 C_P
foreach var of local list_weeks{
	gen `var'_`j'_byothers=.
	local x=_N
	forvalues i=1(1) `x'{
		if position_number_P==1 in `i'{
			gen temp_myanswer=`var'_`j'_1
			replace temp_myanswer=. in `i'
			egen temp_mean_myanswer=mean(temp_myanswer), by(group_P term)
			replace `var'_`j'_byothers=temp_mean_myanswer if _n == `i'
			drop temp_myanswer temp_mean_myanswer
			}
		if position_number_P==2 in `i'{
			gen temp_myanswer=`var'_`j'_2
			replace temp_myanswer=. in `i'
			egen temp_mean_myanswer=mean(temp_myanswer), by(group_P term)
			replace `var'_`j'_byothers=temp_mean_myanswer if _n == `i'
			drop temp_myanswer temp_mean_myanswer
			}
		if position_number_P==3 in `i'{
			gen temp_myanswer=`var'_`j'_3
			replace temp_myanswer=. in `i'
			egen temp_mean_myanswer=mean(temp_myanswer), by(group_P term)
			replace `var'_`j'_byothers=temp_mean_myanswer if _n == `i'
			drop temp_myanswer temp_mean_myanswer
			}
		if position_number_P==4 in `i'{
			gen temp_myanswer=`var'_`j'_4
			replace temp_myanswer=. in `i'
			egen temp_mean_myanswer=mean(temp_myanswer), by(group_P term)
			replace `var'_`j'_byothers=temp_mean_myanswer if _n == `i'
			drop temp_myanswer temp_mean_myanswer
			}
		}
	}
}


***define as good if others say 1:
forvalues i=9(1) 12{
	local list_outcomes T1_`i' T2_`i' C_T_`i' P1_`i' P2_`i' C_P_`i'
	foreach var of local list_outcomes{
		gen `var'_byoth_good=1 if `var'_byothers==1
		replace `var'_byoth_good=0 if `var'_byothers<1
		}
	}	

***check the variables:
forvalues i=9(1) 9{
sort term group_T position_number_T
*browse term group_T position_number_T T1_`i'_1 T1_`i'_byothers T1_`i'_byoth_good
sum T1_`i'_1 T1_`i'_byothers
}


*T1_15

***for T1_15=30,30,15,25, then:
local list_outcomes T1 T2 C_T P1 P2 C_P
foreach var of local list_outcomes{
	replace `var'_16=substr(T1_15,4,2) if strmatch(T1_15,"??,??,??,??")==1
	replace `var'_17=substr(T1_15,7,2) if strmatch(T1_15,"??,??,??,??")==1
	replace `var'_18=substr(T1_15,10,2) if strmatch(T1_15,"??,??,??,??")==1
	replace `var'_15=substr(T1_15,1,2) if strmatch(T1_15,"??,??,??,??")==1
	}

*browse T1_15 T1_16 T1_17 T1_18 
	
forvalues i=15(1) 18{

local list_outcomes T1_`i' T2_`i' C_T_`i' P1_`i' P2_`i' C_P_`i'
foreach var of local list_outcomes{
	replace `var'=subinstr(`var',"&#37;","",1)
	replace `var'=subinstr(`var',"%","",1)
	replace `var'="" if `var'=="not answered"|`var'=="?"
	replace `var'="0" if `var'=="none"
	replace `var'="25" if `var'=="25,"
	replace `var'="100" if `var'=="all"
	replace `var'="" if length(`var')>=5
	replace `var'="" if `var'=="n/a"|`var'=="N/A"|`var'=="x"|`var'=="-"|`var'=="good"|`var'=="text"|`var'=="work"
	destring `var', replace
	}
}	

***reweigh if more than 100%
local list_questionnaire T1 T2 C_T P1 P2 C_P
foreach var of local list_questionnaire{

gen `var'_sum=`var'_15+`var'_16+`var'_17+`var'_18
forvalues i=15(1) 18{
	replace `var'_`i'=`var'_`i'/`var'_sum*100
	}
}

*browse T2_15 T2_16 T2_17 T2_18 

****clean:
*browse C_P_15 C_P_16 C_P_17 C_P_18 
*codebook T2_15 T2_16 T2_17 T2_18 

****Now isolate MY contribution:
local list_weeks T1 T2 C_T
foreach var of local list_weeks{
	gen `var'_mycontrib=`var'_15 if position_number_T==1
	replace `var'_mycontrib=`var'_16 if position_number_T==2
	replace `var'_mycontrib=`var'_17 if position_number_T==3
	replace `var'_mycontrib=`var'_18 if position_number_T==4
	}

local list_weeks P1 P2 C_P
foreach var of local list_weeks{
	gen `var'_mycontrib=`var'_15 if position_number_P==1
	replace `var'_mycontrib=`var'_16 if position_number_P==2
	replace `var'_mycontrib=`var'_17 if position_number_P==3
	replace `var'_mycontrib=`var'_18 if position_number_P==4
	}

*browse T1_15 T1_16 T1_17 T1_18 T1_mycontrib position_number_T P1_mycontrib position_number_P

	
****But people can lie. So isolate MY contribution as the average of my contribution rated by others
sort term term_group_T position_number_T
*browse term term_group_T position_number_T T1_15 T1_16 T1_17 T1_18

local list_weeks T1 T2 C_T
foreach var of local list_weeks{
	gen `var'_mean_mycontrib=.
	local x=_N
	forvalues i=1(1) `x'{
		if position_number_T==1 in `i'{
			gen temp_mycontrib=`var'_15
			replace temp_mycontrib=. in `i'
			egen temp_mean_mycontrib=mean(temp_mycontrib), by(group_T term)
			replace `var'_mean_mycontrib=temp_mean_mycontrib if _n == `i'
			drop temp_mycontrib temp_mean_mycontrib
			}
		if position_number_T==2 in `i'{
			gen temp_mycontrib=`var'_16
			replace temp_mycontrib=. in `i'
			egen temp_mean_mycontrib=mean(temp_mycontrib), by(group_T term)
			replace `var'_mean_mycontrib=temp_mean_mycontrib if _n == `i'
			drop temp_mycontrib temp_mean_mycontrib
			}
		if position_number_T==3 in `i'{
			gen temp_mycontrib=`var'_17
			replace temp_mycontrib=. in `i'
			egen temp_mean_mycontrib=mean(temp_mycontrib), by(group_T term)
			replace `var'_mean_mycontrib=temp_mean_mycontrib if _n == `i'
			drop temp_mycontrib temp_mean_mycontrib
			}
		if position_number_T==4 in `i'{
			gen temp_mycontrib=`var'_18
			replace temp_mycontrib=. in `i'
			egen temp_mean_mycontrib=mean(temp_mycontrib), by(group_T term)
			replace `var'_mean_mycontrib=temp_mean_mycontrib if _n == `i'
			drop temp_mycontrib temp_mean_mycontrib
			}
		}
	}
	
***same for presentations:
local list_weeks P1 P2 C_P
foreach var of local list_weeks{
	gen `var'_mean_mycontrib=.
	local x=_N
	forvalues i=1(1) `x'{
		if position_number_T==1 in `i'{
			gen temp_mycontrib=`var'_15
			replace temp_mycontrib=. in `i'
			egen temp_mean_mycontrib=mean(temp_mycontrib), by(group_P term)
			replace `var'_mean_mycontrib=temp_mean_mycontrib if _n == `i'
			drop temp_mycontrib temp_mean_mycontrib
			}
		if position_number_T==2 in `i'{
			gen temp_mycontrib=`var'_16
			replace temp_mycontrib=. in `i'
			egen temp_mean_mycontrib=mean(temp_mycontrib), by(group_P term)
			replace `var'_mean_mycontrib=temp_mean_mycontrib if _n == `i'
			drop temp_mycontrib temp_mean_mycontrib
			}
		if position_number_T==3 in `i'{
			gen temp_mycontrib=`var'_17
			replace temp_mycontrib=. in `i'
			egen temp_mean_mycontrib=mean(temp_mycontrib), by(group_P term)
			replace `var'_mean_mycontrib=temp_mean_mycontrib if _n == `i'
			drop temp_mycontrib temp_mean_mycontrib
			}
		if position_number_T==4 in `i'{
			gen temp_mycontrib=`var'_18
			replace temp_mycontrib=. in `i'
			egen temp_mean_mycontrib=mean(temp_mycontrib), by(group_P term)
			replace `var'_mean_mycontrib=temp_mean_mycontrib if _n == `i'
			drop temp_mycontrib temp_mean_mycontrib
			}
		}
	}


***descriptive statistics on difference between:
*-my contribution rated by me
*-my contribution rated by others:

sum C_T_mean_mycontrib C_T_mycontrib
*reg mean_mycontrib_C_T C_T_mycontrib	
sum C_P_mean_mycontrib C_P_mycontrib

forvalues i=3(1) 3{
*browse T1_`i' T2_`i' C_T_`i' P1_`i' P2_`i' C_P_`i'
}


****now define variables based on these contributions:
***first, a dummy if deviation from 25,25,25,25:
***second, the standard deviation
local list_outcomes T1 T2 C_T P1 P2 C_P
foreach var of local list_outcomes{
	gen `var'_contrib_equal=1 if `var'_15==25&`var'_16==25&`var'_17==25&`var'_18==25
	replace `var'_contrib_equal=0 if `var'_contrib_equal~=1&`var'_15~=.
	gen `var'_contrib_mean=(`var'_15+`var'_16+`var'_17+`var'_18)/4
	gen `var'_contrib_sd=(((`var'_15-`var'_contrib_mean)^2+(`var'_16-`var'_contrib_mean)^2+(`var'_17-`var'_contrib_mean)^2+(`var'_18-`var'_contrib_mean)^2)/4)^(1/2)

	***dichotomous good/bad:
	gen `var'_mycontrib_good=1 if `var'_mycontrib>=25&`var'_mycontrib~=.
	replace `var'_mycontrib_good=0 if `var'_mycontrib<25
	gen `var'_mean_mycont_good=1 if `var'_mean_mycontrib>=25&`var'_mean_mycontrib~=.
	replace `var'_mean_mycont_good=0 if `var'_mean_mycontrib<25

	}
	
sort term term_group_T position_number_T
*browse term term_group_T position_number_T T1_15 T1_16 T1_17 T1_18 T1_contrib_equal T1_contrib_mean T1_contrib_sd T1_mycontrib_good T1_mean_mycont_good




save "data_for_regression_opinion surveys.dta", replace


****merge with coding of messages and denunciations of free riders:
sort term id
merge term id using "Coding.dta"
codebook _merge
sort _merge
***We keed the students with a quiz grade:
keep if _merge==3
*drop _merge

count
*browse id term studentname quiz1 final posting_quiz1_quiz2 freerider_reported_assign
***This student dropped the course:
drop if id==739583739

****clean consent:
label define labelconsent 1 "1.Yes, I wish to fill the course evaluation (the questionnaire will appear in your folder)" 2 "2.No, I do not want to fill the course evaluation"

*browse T1_consent

local list_outcomes T1_consent T2_consent P1_consent P2_consent C_consent
foreach var of local list_outcomes{
	replace `var'="1" if strpos(`var',"Yes")~=0&`var'~=""
	replace `var'="2" if strpos(`var',"No")~=0&`var'~=""
	replace `var'="" if `var'=="not answered"
	destring `var', replace
	label values `var' labelconsent
	}
	
	
	
******CONSENT:
*browse T1_consent T2_consent P1_consent P2_consent C_consent

***drop those who do no consent to the final course evaluation questionnaire:
drop if C_consent==2


save "data_for_regression_opinion surveys_freerider.dta", replace










